import networkx as nx
journal=nx.read_graphml("data/journal.graphml")
Esta es una red que representa relaciones de citado entre journals. No es la red total, lo que se tiene es:
journal.number_of_edges(), journal.number_of_nodes()
(4112, 232)
Esta red viene con atributos en los nodos y enlaces:
# en los nodos
[x.keys() for x in journal.nodes.values()][0]
dict_keys(['label', 'description', 'cluster', 'x', 'y', 'issn', 'citations'])
# en los enlaces
[x.keys() for x in journal.edges.values()][0]
dict_keys(['weight', 'id'])
Esta es una red que indica el patron de citación entre revistas, por lo que es de tipo dirigido:
nx.is_directed(journal)
True
Verifiquemos algunas particularidades:
list(nx.isolates(journal))
[]
nx.overall_reciprocity(journal)
0.0
Esto indica que ningun nodo recibe un link de otro. Aqui el detalle:
import pandas as pd
pd.DataFrame.from_dict(nx.reciprocity(journal,nodes=journal.nodes),orient='index').value_counts()
0.0 232 dtype: int64
nx.is_strongly_connected(journal)
False
Asi, hay tantos componentes conectados como nodos.
nx.number_strongly_connected_components(journal)
232
nx.density(journal)
0.0767278698313181
La densidad es muy baja, lo cuál corrobora lo hayado.
weights = list(nx.get_edge_attributes(journal,'weight').values())
weights=[10*(w/max(weights)) for w in weights]
from matplotlib import colors
myColMap = colors.ListedColormap([ 'salmon', 'lime', 'blue', 'black','magenta'])
ATTRIBUTE_NAME = 'cluster'
colors = list(nx.get_node_attributes(journal,ATTRIBUTE_NAME).values())
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {'pos':nx.random_layout(journal),
"node_size": 10,
"with_labels": False,
'width':weights,
'edge_color':'gainsboro',
'cmap':myColMap,
'node_color':colors}
nx.draw_networkx(journal, ax=ax, **plot_options)
Esta familia de layouts ayuda graficar los nodos mezclando atracción y repulsion.
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {'pos':nx.spring_layout(journal),
"node_size": 10,
"with_labels": False,
'width':weights,
'edge_color':'gainsboro',
'cmap':myColMap,
'node_color':colors}
nx.draw(journal, ax=ax, **plot_options)
Vosviewer incluye unas coordenadas en su red, las cuáles sirven para resaltar una estructura de su interés:
nodePositions_vox={k:(x,y) for k,x,y in zip(list(journal.nodes),
nx.get_node_attributes(journal, 'x').values(),
nx.get_node_attributes(journal, 'y').values())}
fig, ax = plt.subplots(figsize=(15, 9))
plot_options = {'pos':nodePositions_vox,
"node_size": 10,
"with_labels": False,
'width':weights,
'edge_color':'gainsboro',
'cmap':myColMap,
'node_color':colors}
ax.axis("off")
nx.draw_networkx(journal, ax=ax, **plot_options)
Como se ve, este layout sirve para diferenciar tipos de revistas.
De esa red podemos saber:
¿Quíen tiene más vecinos?
a. conectados a él:
# nodo y candidad de seguidores:
(sorted(journal.in_degree, key=lambda item: item[1], reverse=True))[0]
('115', 38)
b. con los que él se conecta:
# nodo y cantidad de nodos seguidos
(sorted(journal.out_degree, key=lambda item: item[1], reverse=True))[0]
('2', 114)
Podemos ubicar estos nodos particulares:
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": 10, "with_labels": False, "width": 0.15,'edge_color':'grey','node_color':'yellow'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
nx.draw_networkx_nodes(journal,nodePositions_vox,
nodelist=['115','2'],
ax=ax,
node_color=['r','b'], node_size=100)
<matplotlib.collections.PathCollection at 0x15c563d10>
Guardemos todos los valores anteriores:
import pandas as pd
indegree=pd.Series(dict(journal.in_degree()),name='indegree')
outdegree=pd.Series(dict(journal.out_degree()),name='outdegree')
Guardemos como atributos del propio nodo:
inde = dict(journal.in_degree())
nx.set_node_attributes(journal, inde, "in_degree")
outde = dict(journal.out_degree())
nx.set_node_attributes(journal, outde, "out_degree")
journal.nodes(data=True)['115'], journal.nodes(data=True)['2']
({'label': 'ind corp change',
'description': 'INDUSTRIAL AND CORPORATE CHANGE',
'cluster': 1,
'x': 58.000988,
'y': 1029.203,
'issn': '0960-6491',
'citations': 85,
'in_degree': 38,
'out_degree': 34},
{'label': 'am econ rev',
'description': 'AMERICAN ECONOMIC REVIEW',
'cluster': 1,
'x': 395.52368,
'y': 595.9958,
'issn': '0002-8282',
'citations': 643,
'in_degree': 1,
'out_degree': 114})
Usemos esos valores para variar tamaño de nodos:
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": [journal.nodes[node]['in_degree'] * 10 for node in journal.nodes()],
"with_labels": False, "width": 0.15,'edge_color':'grey','node_color':'red'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size":[journal.nodes[node]['out_degree'] * 10 for node in journal.nodes()],
"with_labels": False, "width": 0.15,'edge_color':'grey','node_color':'blue'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
Los nodos de con indegree o outdegree de grado zero son interesantes. Los primeros son puertas de acceso, los segundo puertas de salida.
zeroDegree=pd.Series(1*(indegree==0) + 2*(outdegree==0),name='zerodegree')
zeroDegree
1 1
2 0
3 0
4 0
5 0
..
228 0
229 0
230 2
231 0
232 2
Name: zerodegree, Length: 232, dtype: int64
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": 1, "with_labels": False, "width": 0.15,'edge_color':'grey','node_color':'yellow'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
nx.draw_networkx_nodes(journal,nodePositions_vox,nodelist=zeroDegree[zeroDegree==1].index,ax=ax,node_color=['r'], node_size=100)
nx.draw_networkx_nodes(journal,nodePositions_vox,nodelist=zeroDegree[zeroDegree==2].index,ax=ax,node_color=['b'], node_size=100)
<matplotlib.collections.PathCollection at 0x15be6ad90>
En sesiones anteriores vimos que la ubicación de los nodos les da cierta centralidad o relevancia en la red:
closeness_centrality_in_dict=nx.closeness_centrality(journal,distance=None)
closeness_centrality_out_dict=nx.closeness_centrality(journal.reverse(),distance=None)
betweenness_centrality_dict=nx.betweenness_centrality(journal)
#como atributo
nx.set_node_attributes(journal, closeness_centrality_in_dict, "in_closeness")
nx.set_node_attributes(journal, closeness_centrality_out_dict, "out_closeness")
nx.set_node_attributes(journal, betweenness_centrality_dict, "betweenness")
#para el data frame
closeness_centrality_in= pd.Series(closeness_centrality_in_dict,name='INcloseness')
closeness_centrality_out= pd.Series(closeness_centrality_out_dict,name='OUTcloseness')
betweenness_centrality= pd.Series(betweenness_centrality_dict,name='betweenness')
Grafiquemos:
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": [journal.nodes[node]['out_closeness'] * 100 for node in journal.nodes()],
"with_labels": False, "width": 0.15,
'edge_color':'silver','node_color':'k'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": [journal.nodes[node]['betweenness'] * 1000 for node in journal.nodes()],
"with_labels": False,
"width": 0.1,'edge_color':'silver','node_color':'purple'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
Hay algunos nodos que resaltan, podríamos verificar si hay puntos o enlaces de corte (que desconectarían a la red).
node_cut=nx.minimum_node_cut(journal)
edge_cut=nx.minimum_edge_cut(journal)
len(node_cut),len(edge_cut)
(0, 0)
Vimos anteriormente que el patrón de la dirección de los enlaces permite visualizar nodos clave, en el sentido de tener 'contenido' atractivo.
h_dict,a_dict=nx.hits(journal)
pagerank_dict=nx.pagerank(journal)
# nuevos atributos
nx.set_node_attributes(journal, h_dict, "HITS_hubs")
nx.set_node_attributes(journal, a_dict, "HITS_authorities")
nx.set_node_attributes(journal, pagerank_dict, "Pagerank")
# actualizando tabla
HITS_hubs=pd.Series(h_dict,name='HITS_hubs')
HITS_authorities=pd.Series(a_dict,name="HITS_authorities")
Pagerank=pd.Series(pagerank_dict,name='Pagerank')
pd.concat([HITS_hubs,HITS_authorities,Pagerank],axis=1)
| HITS_hubs | HITS_authorities | Pagerank | |
|---|---|---|---|
| 1 | 0.033161 | -0.000000 | 0.001480 |
| 2 | 0.083688 | 0.000103 | 0.001483 |
| 3 | 0.182790 | 0.006601 | 0.001535 |
| 4 | 0.064329 | 0.002634 | 0.001495 |
| 5 | 0.030963 | 0.012210 | 0.001650 |
| ... | ... | ... | ... |
| 228 | 0.000013 | 0.000891 | 0.017221 |
| 229 | 0.000013 | 0.000674 | 0.012053 |
| 230 | -0.000000 | 0.001219 | 0.005440 |
| 231 | 0.000019 | 0.000502 | 0.028470 |
| 232 | -0.000000 | 0.000506 | 0.052718 |
232 rows × 3 columns
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": [journal.nodes[node]['HITS_hubs'] * 100 for node in journal.nodes()],
"with_labels": False,
"width": 0.15,'edge_color':'silver','node_color':'green'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
fig, ax = plt.subplots(figsize=(15, 9))
ax.axis("off")
plot_options = {"node_size": [journal.nodes[node]['Pagerank'] * 1000 for node in journal.nodes()],
"with_labels": False,
"width": 0.15,'edge_color':'silver','node_color':'magenta'}
nx.draw_networkx(journal, pos=nodePositions_vox, ax=ax, **plot_options)
Abra en Gephi el archivo SeattleTop, calcule los estadístico vistos en esta sesión, y úselos para graficar.